# descriptives

#Set Directory
setwd('/Users/drschaef/Google Drive/Projects/Adriana/Raw data/Longitudinal Data/')

library(network)
library(sna)
library(statnet)
library(ggplot2)

#Set Directory
setwd('/Users/drschaef/Google Drive/Projects/Adriana/Raw data/Longitudinal Data/')
setwd('/Users/drschaef/My Drive/Projects/Adriana/Raw data/Longitudinal Data/')


### M_ data (N=1795 in raw files)
dat <- data.frame(readRDS('attrNoRaceM_220523.rds'),readRDS('attrRaceM_220829_noAMENA.rds')[,-1])
mat1 <-  readRDS("M_W1.network.rds")
mat2 <-  readRDS("M_W2.network.rds")
mat3 <-  readRDS("M_W3.network.rds")
eca1 <- readRDS("ecaM1_220608.rds")
eca2 <- readRDS("ecaM2_220608.rds")
skin <- readRDS("skinM_.rds") 

# append skin tone measures to dat
datM <- merge(x=dat, y=skin, all.x=T, all.y=F, by.x='MID', by.y='MFID')

### S_ data (N=3298 in raw files) 
dat <- data.frame(readRDS('attrNoRaceS_220523.rds'),readRDS('attrRaceS_220829_noAMENA.rds')[,-1])
mat1 <-  readRDS("S_W1.network.rds")
mat2 <-  readRDS("S_W2.network.rds")
mat3 <-  readRDS("S_W3.network.rds")
eca1 <- readRDS("ecaA1_220608.rds")
eca2 <- readRDS("ecaA2_220608.rds")
skin <- readRDS("skinS_.rds")

# append skin tone measures to dat
datA <- merge(x=dat, y=skin, all.x=T, all.y=F, by.x='MID', by.y='MFID')

datL <- list(datM,datA)

# FUNCTION: biracial student current identification compared to chance
calcBiChoice <- function(SCHOOL) {
datS <- datL[[SCHOOL]]  # pick a school
dat <- datS[,which(names(datS)=="A"):which(names(datS)=="O")]  # heritage columns
dat$raceFirst <- NA
dat$raceSecond <- NA
dat$nRace <- NA
for (i in 1:dim(dat)[1]) {   # for each student
	temp <- dat[i,1:6]       # isolate heritage
	dat$raceFirst[i] <- which(temp==1)[2]
	dat$raceSecond[i] <- which(temp==1)[1]
	dat$nRace[i] <- sum(temp==1)
	}
round(table(dat$nRace) / sum(table(dat$nRace)),3)  # % with number of backgrounds 
datMoBi <- data.frame(dat, datS$chooseOne1, datS$chooseOne2, datS$chooseOne3)
datMoBi <- datMoBi[datMoBi$nRace<=2,]
datMoBi[datMoBi==8] <- 6                 # code other into 6	
datMoBi$raceFirst[is.na(datMoBi$raceFirst)] <- datMoBi$raceSecond[is.na(datMoBi$raceFirst)]
table(datMoBi$raceFirst, datMoBi$raceSecond, useNA='always')  # number of each combo
# 1=A, 2=B, 3=L, 4=W, 5=N, 6=O
datBi <- datMoBi[datMoBi$nRace==2,]  # only biracial heritage students
for (j in 1:3) {
	tempColNumb <- which(names(datBi)==paste('datS.chooseOne',j, sep=''))
	temp <- datBi[!is.na(datBi[tempColNumb]),] # reduce data to valid chooseOne_
	temp <- temp[temp[tempColNumb] < 9,] # exclude multi category
	table(temp$raceFirst, temp$raceSecond, useNA='always')  # number of each combo
	tempColNumbs <- c(which(names(datBi)=='raceFirst'),which(names(datBi)=='raceSecond'),tempColNumb)
	temp2 <- aggregate(temp[,1], by=temp[tempColNumbs], length)   # count number with each choice
	temp3 <- aggregate(temp2$x, by=temp2[1:2], sum) 
	names(temp3)[3] <- 'X'
	temp4 <- merge(x=temp2, y=temp3, all.x=T)
	temp4$px <- temp4$x / temp4$X
	temp4$se <- ((temp4$px * (1-temp4$px)) / temp4$X)**.5
	temp4$CIlow <- temp4$px - (temp4$se * 1.96)
	temp4$CIhi <- temp4$px + (temp4$se * 1.96)
	temp4$seNH <- (.25 / temp4$X)**.5   # NH=null hypothesis calculation of SE (around p=.5)
	temp4$CIlowNH <- .5 - (temp4$seNH * 1.96)
	temp4$CIhiNH <- .5 + (temp4$seNH * 1.96)
	temp4$sig <- temp4$px > temp4$CIhiNH | temp4$px < temp4$CIlowNH
	temp4a <- temp4   # create cells above the diagonal
	temp4a[,1] <- temp4[,2]
	temp4a[,2] <- temp4[,1]
	temp5 <- rbind(temp4, temp4a)
	if (j==1) { w1 <- temp5 }
	if (j==2) { w2 <- temp5 }
	if (j==3) { temp6 <- list(w1,w2,temp5) }
	}
	return(temp6)
}
biChoices <- list(calcBiChoice(1),calcBiChoice(2))
#biChoices[[2]][[1]]

# function to plot bi choices SEPARATE figures 
plotBi <- function(SCH,WAVE) {
tab <- biChoices[[SCH]][[WAVE]]
tab <- tab[tab[,1]==tab[,3],]
tab2 <- tab[tab$X >= 10,]
tab30 <- data.frame(raceFirst=rep(1:6, 6), raceSecond=rep(1:6, each=6))
tab30 <- tab30[tab30$raceFirst != tab30$raceSecond,]
tab3 <- merge(x=tab30, y=tab2, all.x=T)
tab3[is.na(tab3)] <- .5
tab3$ymin <- 0:(dim(tab3)[1]-1)
tab3$ymax <- 1:(dim(tab3)[1])
#tab3$label <- c('','','Asian','','','','','Black','','','','','Latino','','','','','White','','','','','Nat. Am.','','','','','Other','','')
rec1 <- data.frame(xmin=rep(0,6), xmax=.5, ymin=(0:5)*5, ymax=(1:6)*5, col=c('red','black','green','blue','orange','pink'))
rec2 <- data.frame(xmin=rep(.5,6), xmax=1, ymin=(0:29), ymax=(1:30), 
	col= rep(c('red','black','green','blue','orange','pink'),6)[-c(1,8,15,22,29,36)] )
par(mar=(c(3,3,1,2)))
ggplot(tab3, aes(px, (ymin+.5))) + 
  geom_rect(data=tab3, inherit.aes=FALSE,
            aes(xmin= CIlowNH,xmax= CIhiNH,ymin=ymin,ymax=ymax,
                fill = 'black'), alpha=0.6)+
  geom_point(size=(tab3$px!=.5)*2, shape=19) +
  scale_y_continuous(limits = c(0,30), expand = c(0, 0), breaks = seq(0,25,5)+2.5, labels =c("Asian", "Black", "Latino", "White", "Nat. Am.", "Other") ) +
  scale_x_continuous(limits = c(0,1), expand = c(0, 0)) +
  scale_color_manual(values=c("red", "gray55"))+
  scale_fill_manual(values=c("gray50")) +
  guides(fill="none") +
  geom_hline(yintercept=(1:5)*5) +
  geom_vline(xintercept=.5) +
  theme_classic() +
   annotate("rect", xmin = rec1$xmin, xmax = rec1$xmax, 
           ymin = rec1$ymin, ymax = rec1$ymax, alpha = .2, fill = c('red','black','green','blue','orange','pink')) +
   annotate("rect", xmin = rec2$xmin, xmax = rec2$xmax, 
           ymin = rec2$ymin, ymax = rec2$ymax, alpha = .2, fill = rec2$col) +
  ggtitle(paste('School ',SCH, ', Wave ',WAVE, sep='')) +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Proportion Choosing Race") +
  ylab('') +
    theme(plot.margin = margin(.5,.5,.5,.75, "cm") ) +
    theme(axis.text.y = element_text(size=12))  +
    theme(axis.text.x = element_text(size=12))
}
plotBi(1,1)  
plotBi(1,2)  
plotBi(1,3)  
plotBi(2,1)  
plotBi(2,2)  
plotBi(2,3)  
 

# plot bi choices SINGLE figures 
# -function to prepare table for plotting
prepTab <- function(TAB) {
	tab <- TAB
	tab <- tab[tab[,1]==tab[,3],]
	tab2 <- tab[tab$X >= 10,]
	tab30 <- data.frame(raceFirst=rep(1:6, 6), raceSecond=rep(1:6, each=6))
	tab30 <- tab30[tab30$raceFirst != tab30$raceSecond,]
	tab3 <- merge(x=tab30, y=tab2, all.x=T)
#	tab3[is.na(tab3)] <- .5
#	tab3$sig[tab3$sig==.5] <- NA
#	tab3<-tab3[c(29,30,28,27,26, 20,19,18,17,16, 25,24,23,22,21, 15,13,14,12,11, 10,8,9,7,6, 5,3,4,2,1) ,]
	tab3<-tab3[c(25,21,23,24,22, 30,26,28,29,27, 4,5,2,3,1, 14,15,11,13,12, 19,20,16,18,17, 9,10,6,8,7) ,]
	tab3$ymin <- 0:(dim(tab3)[1]-1)
	tab3$ymax <- 1:(dim(tab3)[1])
	return(tab3)
	}

tab <- prepTab(biChoices[[1]][[1]])
tab2 <- prepTab(biChoices[[1]][[2]])
tab3 <- prepTab(biChoices[[1]][[3]])
tab4 <- prepTab(biChoices[[2]][[1]])
tab5 <- prepTab(biChoices[[2]][[2]])
tab6 <- prepTab(biChoices[[2]][[3]])
#labels30 <- c('White - Other', 'Nat. Am. - Other','Latino - Other','Black - Other','Asian - Other',
#	'Other - White', 'Nat. Am. - White','Latino - White','Black - White','Asian - White',
#	'Other - Nat. Am.', 'White - Nat. Am.','Latino - Nat. Am.','Black - Nat. Am.','Asian - Nat. Am.',
#	'Other - Latino','White - Latino','Nat. Am. - Latino','Black - Latino','Asian - Latino',
#	'Other - Black', 'White - Black','Nat. Am. - Black','Latino - Black','Asian - Black',
#	'Other - Asian', 'White - Asian','Nat. Am. - Asian','Latino - Asian','Black - Asian')
labels30 <- c(
	'Other - Nat. Am.','Asian - Nat. Am.', 'Latino - Nat. Am.','White - Nat. Am.','Black - Nat. Am.',
	'Nat. Am. - Other','Asian - Other', 'Latino - Other','White - Other','Black - Other',
	'Nat. Am. - Asian','Other - Asian', 'Latino - Asian','White - Asian','Black - Asian',
	'Nat. Am. - Latino','Other - Latino','Asian - Latino','White - Latino','Black - Latino',
	'Nat. Am. - White','Other - White','Latino - White','Asian - White', 'Black - White',
	'Nat. Am. - Black','Other - Black','Asian - Black','Latino - Black','White - Black'
	)
#colors6 <- c('pink','blue','orange','green','black','red')
colors6 <- c('orange','pink','red','green','blue','black')
rec1 <- data.frame(xmin=rep(.5,6), xmax=1, ymin=(0:5)*5, ymax=(1:6)*5, col=colors6)
rec2 <- data.frame(xmin=rep(0,6), xmax=.5, ymin=(0:29), ymax=(1:30), col= rep(colors6,6)[-c(1,8,15,22,29,36)] )
par(mar=(c(3,3,2,2)))
ggplot() +
  geom_point(data=tab, aes(x=px, y=(ymin+.8)), size=(!is.na(tab5$px))*2, shape= (tab$sig*15+1) ) +
  geom_vline(aes(xintercept=.50), col='gray90') +
  geom_point(data=tab2, aes(x=px, y=(ymin+.68)), size=(!is.na(tab5$px))*2, shape= (tab2$sig*15+1) ) +
  geom_point(data=tab3, aes(x=px, y=(ymin+.56)), size=(!is.na(tab5$px))*2, shape= (tab3$sig*15+1) ) +
  geom_point(data=tab4, aes(x=px, y=(ymin+.44)), size=(!is.na(tab5$px))*2, shape= (tab4$sig*15+2) ) +
#  geom_point(data=tab5, aes(x=px, y=(ymin+.32)), size=(tab5$px!=.5)*2, shape= (tab5$sig*15+2) ) +
  geom_point(data=tab5, aes(x=px, y=(ymin+.32)), size=(!is.na(tab5$px))*2, shape= (tab5$sig*15+2) ) +
  geom_point(data=tab6, aes(x=px, y=(ymin+.2)), size=(!is.na(tab5$px))*2, shape= (tab6$sig*15+2) ) +
#  scale_y_continuous(limits=c(0,30), expand=c(0, 0), breaks=seq(0,25,5)+2.5, labels=c("Other","White","Nat. Am.", "Latino", "Black","Asian") ) +
  scale_y_continuous(limits=c(0,30), expand=c(0, 0), breaks=seq(0,29)+.5, labels=labels30 ) +
  scale_x_continuous(limits = c(0,1), expand = c(0, 0)) +
  guides(fill="none") +
  geom_vline(aes(xintercept=.50), col='gray90') +
  geom_hline(aes(yintercept=1:29), col='gray90') +
  geom_hline(yintercept=(1:5)*5) +
  theme_classic() +
#  annotate("rect", xmin = rec1$xmin, xmax = rec1$xmax, 
#           ymin = rec1$ymin, ymax = rec1$ymax, alpha = .2, fill = rec1$col) +
#  annotate("rect", xmin = rec2$xmin, xmax = rec2$xmax, 
#           ymin = rec2$ymin, ymax = rec2$ymax, alpha = .2, fill = rec2$col) +
#  ggtitle('Proportion of Biracial Youth Choosing Race Listed Second as Their Current Identification') +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("\nProportion Choosing Race Listed Second as Current Identification") +
  ylab('') +
  theme(plot.margin = margin(.5,.5,.5,.75, "cm"), panel.border = element_rect(colour = "black", fill=NA, size=1.5) ) +
  theme(axis.text.y = element_text(size=12))  +
  theme(axis.text.x = element_text(size=12), axis.title.x = element_text(size = 12) ) 

# trying to move some labels to right side (231203)
labels30b <- c(
	'Other','Asian', 'Latino/a','White','Black',
	'Nat. Am.','Asian', 'Latino/a','White','Black',
	'Nat. Am.','Other', 'Latino/a','White','Black',
	'Nat. Am.','Other','Asian','White','Black',
	'Nat. Am.','Other','Latino/a','Asian', 'Black',
	'Nat. Am.','Other','Asian','Latino/a','White'
	)
ggplot() +
  geom_point(data=tab, aes(x=px, y=(ymin+.8)), size=(!is.na(tab5$px))*2, shape= (tab$sig*15+1) ) +
  geom_vline(aes(xintercept=.50), col='gray90') +
  geom_point(data=tab2, aes(x=px, y=(ymin+.68)), size=(!is.na(tab5$px))*2, shape= (tab2$sig*15+1) ) +
  geom_point(data=tab3, aes(x=px, y=(ymin+.56)), size=(!is.na(tab5$px))*2, shape= (tab3$sig*15+1) ) +
  geom_point(data=tab4, aes(x=px, y=(ymin+.44)), size=(!is.na(tab5$px))*2, shape= (tab4$sig*15+2) ) +
  geom_point(data=tab5, aes(x=px, y=(ymin+.32)), size=(!is.na(tab5$px))*2, shape= (tab5$sig*15+2) ) +
  geom_point(data=tab6, aes(x=px, y=(ymin+.2)), size=(!is.na(tab5$px))*2, shape= (tab6$sig*15+2) ) +
  scale_y_continuous(limits=c(0,30), expand=c(0, 0), breaks=seq(0,29)+.5, labels=labels30b, 
  	sec.axis=sec_axis(~ . * 1, name='', breaks=seq(0,5)*5+2.5, labels=c("Nat. Am.","Other","Asian","Latino/a","White","Black"))) +
  scale_x_continuous(limits = c(0,1), expand = c(0, 0)) +
  guides(fill="none") +
  geom_vline(aes(xintercept=.50), col='gray90') +
  geom_hline(aes(yintercept=1:29), col='gray90') +
  geom_hline(yintercept=(1:5)*5) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("\nProportion Claiming Race Listed on Right as Primary Identity") +
  ylab('') +
  theme(plot.margin = margin(.5,.5,.5,.75, "cm"), panel.border = element_rect(colour = "black", fill=NA, size=1.5) ) +
  theme(axis.text.y = element_text(size=12))  +
  theme(axis.text.x = element_text(size=12), axis.title.x = element_text(size = 12) ) +
  theme(axis.ticks.y = element_line(color = NA), axis.ticks.length = unit(5, "points"))

  
  